import facade
from parsel import Selector
from xjlibrary.our_file_dir import BaseDir
from xjlibrary.tools.BaseUrl import BaseUrl

curpath = BaseDir.get_file_dir_absolute(__file__)
TopPath = BaseDir.get_upper_dir(curpath, -2)
dirPath = BaseDir.get_new_path(TopPath, "download", "yuetongthesis", "download", "listhtml")
configfile = BaseDir.get_new_path(curpath, "db.ini")


class ParaList(object):
    def __init__(self):
        self.logger = facade.get_streamlogger()
        self.mysqlutils = facade.MysqlUtiles(configfile, "db", logger=self.logger)

    def para_files(self):
        for file in BaseDir.get_dir_all_files(dirPath):
            Listvalues = []
            page = file.split("\\")[-1].replace(".html", "")
            strings = BaseDir.single_read_file(file)
            select = Selector(text=strings)
            ul_tag = select.xpath('/html/body/div[1]/ul')
            li_list = ul_tag.css('li[style="height: 200px"]')
            for li in li_list:
                title = li.xpath('.//div/span[@class="spakk01"]/a/text()').get()
                title = title.strip()
                url = li.xpath('.//div/span[@class="spakk01"]/a/@onclick').get().replace("openUrl('", "").replace("');",
                                                                                                                  "")
                id = BaseUrl.urlQuery2dic(url)["articleid"][0]
                Listvalues.append((id, url, title, page))
            sql = "insert ignore into thesis(`rawid`,`url`,`title`,`page`) values (%s,%s,%s,%s)"
            self.mysqlutils.ExeSqlMany(sql, Listvalues)


if __name__ == "__main__":
    para = ParaList()
    para.para_files()
